## Warning: package 'GenomeInfoDb' was built under R version 4.0.4
load("~/Documents/MiGASti/Databases/gene_matrix.RData")
metadata <- read.table("~/Documents/MiGASti/Databases/metadata.txt")
#set rownames to Sample
row.names(metadata) <- metadata$Sample 
setwd("~/Documents/MiGASti/Databases")
exclude <- read.table("samples2remove.txt")
exclude <- exclude$x
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude] 
#Excludes the samples from filters. 
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)
#check for expression of markers in dataset
markers = "~/Documents/MiGASti/Databases/Markers.xlsx"
markers = read_excel(markers, col_names = TRUE) 
markers = as.data.frame(markers)
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
setDT(genes_tpm_filt, keep.rownames = "ensembl")
genes_tpm_filt$ensembl
res_name = merge(genes_tpm_filt, gencode_30, by="ensembl")
rownames(res_name) = res_name$ensembl
marker_expression = merge(res_name, markers, by ="symbol")
dim (marker_expression)
head (marker_expression)
metadata_filt_ordered <- metadata_filt[colnames(genes_tpm_filt),]
metadata_filt_ordered = metadata_filt_ordered[-1,]
all(rownames(metadata_filt_ordered) == colnames (genes_tpm_filt)) #TRUE
Stimulation <- metadata_filt_ordered$Stimulation

Expression of microglia, neuronal, astrocyte and oligodendrocyte markers in all 496 samples

rownames(marker_expression) = marker_expression$symbol
cell_type = marker_expression$Cell_type  # keep in same gene order 
marker_expression$ensembl = NULL
df_num = as.matrix(marker_expression[,2:497])
rownames(df_num) = marker_expression$symbol
cell_type = as.factor(cell_type)
# cell_type_colors <- data.frame(cell_type = levels(cell_type), color = I(brewer.pal(nlevels(cell_type), name = 'Dark2')))
cell_type_colors <- data.frame(cell_type = levels(cell_type), color = pal_lancet('lanonc')(nlevels(cell_type)))
cell_type_df = left_join(data.frame(cell_type = cell_type), cell_type_colors, by="cell_type")
res <- unlist(lapply(split(cell_type_colors$color, cell_type_colors$cell_type), unlist))
row_ha = rowAnnotation(`Cell Type` = as.factor(cell_type_df$cell_type), col = list(`Cell Type` = res), show_annotation_name = F)

# pdf(paste0(work_plots, "HM_markers_255s.pdf"), width = 10, height = 6)
colPalette <- colorRampPalette(rev(brewer.pal(10, "RdYlBu")))(496)
Heatmap(df_num,
        col = colPalette,
        name = "log2(TPM+1)", # Legend title
        cluster_rows = T,
        #  row_names_gp = gpar(fontsize = 6), # Text size for row names)
        show_column_dend = F, 
        show_column_names = F,
        show_row_names = T,
        right_annotation = row_ha)
#dev.off()

Heatmap with expression of markers per stimulation of all samples

rownames(df_num) = sapply(marker_expression$symbol,function(x) 
strsplit(as.character(x),split = "\\\\")[[1]][1])
df_num = t(df_num)
pos_df = data.frame("Stimulation" = metadata_filt_ordered$Stimulation)
rownames(pos_df) = rownames(df_num)
pheatmap(df_num, annotation_row = pos_df, main = "Glia and neuronal marker expression per stimulation", show_rownames = F, fontsize_col = 30, fontsize = 30, fontsize_row = 30, cluster_rows = T)